Purpose

Use fermented foods Chow-chow, Kimchi, and Kombucha as model systems to understand microbial diversity. These foods can be created in a public science environment to engage the public and teach about food science and microbial ecology.

Set up

Load packages and define ggplot theme settings

Create function to fix NAs at taxonomy levels

fixTaxaNAs <- function(df){
df_fixed <- df %>%
  mutate(Class=case_when(!is.na(Class)~paste0("c_", Class)),
         Order=case_when(!is.na(Order)~paste0("o_", Order)),
         Family=case_when(!is.na(Family)~paste0("f_", Family)),
         Species=case_when(!is.na(Genus)~replace_na(Species, "spp."))) %>%
  mutate(Class=coalesce(Class, Phylum),
         Order=coalesce(Order, Class),
         Family=coalesce(Family, Order),
         Genus=coalesce(Genus, Family)) %>%
  unite(Species2, c(Genus, Species), remove = FALSE, na.rm=TRUE, sep=" ")
return(df_fixed)  
}

Load data

Create shortcuts for substrates

Organize metadata

Experiment Overview

Figure 2 image

Samples and Experimental Design

Clean up recipe labels

chowRecipes <- c("Tomato", "Onion", "Pepper", "Cabbage", "TomatoOnion", "TomatoPepper", "TomatoCabbage", "OnionPepper",  "OnionCabbage", "PepperCabbage", "TomatoOnionPepper", "TomatoOnionCabbage", "TomatoPepperCabbage", "OnionPepperCabbage", "TomatoOnionPepperCabbage", "brine")
chowRecipeLabels <- c("tomato", "onion", "pepper", "cabbage", "tomato & onion", "tomato & pepper", "tomato & cabbage", "onion & pepper",  "onion & cabbage", "pepper & cabbage", "tomato, onion & pepper", "tomato, onion & cabbage", "tomato, pepper & cabbage", "onion, pepper & cabbage", "tomato, onion, pepper & cabbage", "brine")

kimchiRecipes <- c("Cabbage", "Radish")
kimchiRecipeLabels <- c("cabbage", "radish")


teaRecipes <- c("green", "black", "greenblack", "starter", "scoby")
teaRecipeLabels <- c("green tea", "black tea", "green & black teas", "starter", "commercial scoby")
Samples, per experiment, per day
Day N samples
Chow chow
3 23
10 22
Kimchi
3 22
10 22
Kombucha
4 17
8 17

Table S1: List of all samples:

Levels per experiment

## [1] 16  2  5
sampleNtable <- metadata_16S %>%
  filter(Experiment!="Lab", Experiment!="control") %>%
  with_groups(c(Experiment, Plant), summarize, N=n_distinct(JarID)) %>%
  mutate(Plant=factor(Plant, levels=c(chowRecipes, "Radish", teaRecipes), labels = c(chowRecipeLabels, "radish", teaRecipeLabels))) %>%
  arrange(Experiment, Plant) %>%
  select(-Experiment) %>%
  kableExtra::kable(caption = "Table S1. Sample contents", col.names = c("Contents", "N samples")) %>%
  row_spec(0, bold=TRUE) %>%
  kable_classic(html_font = "Arial", full_width=FALSE) %>%
  pack_rows("Chow chow", 1, 16, bold = TRUE) %>%
  pack_rows("Kimchi", 17, 18, bold = TRUE) %>%
  pack_rows("Kombucha", 19, 23, bold = TRUE) 
sampleNtable
Table S1. Sample contents
Contents N samples
Chow chow
tomato 1
onion 1
pepper 1
cabbage 1
tomato & onion 1
tomato & pepper 1
tomato & cabbage 1
onion & pepper 1
onion & cabbage 1
pepper & cabbage 2
tomato, onion & pepper 1
tomato, onion & cabbage 1
tomato, pepper & cabbage 1
onion, pepper & cabbage 2
tomato, onion, pepper & cabbage 6
brine 1
Kimchi
cabbage 12
radish 10
Kombucha
green tea 5
black tea 5
green & black teas 6
starter 1
commercial scoby 1
save_kable(sampleNtable, zoom=5, file = file.path(figure_out, paste0("TableS1_", Sys.Date(), "_sampleNs.png")))
## file:////private/var/folders/pv/v37hw47124qb8x1m1ryyh2fc0000gn/T/RtmptWOpj8/TableS1_2025-02-17_sampleNs11b0f38307106.html screenshot completed
  • Note, kombucha tea and starter samples (not scoby) also sequenced with ITS primers

Sequencing Descriptives

Reassign lactobacilli for emended genus

ASV_16S1 <- ASV_16S0 %>%
  left_join(newLactoNames) %>%
  replace_na(list(newGenus="x", newSpecies="x")) %>% 
  mutate(Genus=case_when(newGenus!="x"~newGenus, # update to new genus name
                         newGenus=="x"~Genus),
         Species=case_when(newGenus!="x"~newSpecies, # update to new species names
                           newGenus=="x"~Species)) %>%
  select(-newGenus, -newSpecies) %>% # remove those columns
  left_join(naLactoSpecSeqs) %>% # update genus names for lactobacilli that were not assigned at the species level
  mutate(Genus=case_when(Genus=="Lactobacillus"&is.na(Species)~newGenus,
                         Genus=="Lactobacillus"&!is.na(Species)~Genus,
                         Genus!="Lactobacillus"~Genus)) %>%
  select(-newGenus)
## Joining with `by = join_by(Genus, Species)`
## Joining with `by = join_by(seq, Genus, Species)`

Filter and clean 16S

nrow(ASV_16S1)
## [1] 578

There are a total of 578 16S ASVs before filtering.
Remove the following:

  • Eukaryotes
  • Chloroplasts
  • Mitochondria

522 ASVs remain after filtering.

Filter dataframes to samples in chow-chow, kimchi, and tea experiments only.

#ASVs that appear in each experiment
experiment_16SASVs_df <-ASV_16S_expts0 %>%
  gather("SampleID", "Abundance", one_of(metadata_16S$SampleID)) %>%
  left_join(metadata_16S, by = "SampleID") %>%
  filter(Abundance>0) %>%
  select(Experiment, seq) %>%
  unique

experiment_16SASVs_list <- experiment_16SASVs_df %>%
  split(.$Experiment) %>%
  map(~.x$seq)

ASV_16S_expts <- ASV_16S_expts0 %>%
  gather("SampleID", "Reads", one_of(metadata_16S$SampleID)) %>%
  left_join(select(metadata_16S, "Experiment", "SampleID"), by = "SampleID") %>%
  right_join(experiment_16SASVs_df, by = c("seq", "Experiment")) # remove ASVs from each experiment that do not appear in respective samples

Filter and clean ITS

ASV_ITS <- ASV_ITS0 %>%
  select(ASVID, seq, Kingdom, Phylum, Class, Order, Family, Genus, Species, one_of(metadata_ITS$SampleID))

There are 16 ITS ASVs.

Phylum Class Order Family Genus Species
p__Ascomycota
p__Ascomycota
p__Ascomycota
p__Ascomycota
p__Ascomycota
p__Ascomycota Saccharomycetes Saccharomycetales Pichiaceae
p__Ascomycota Saccharomycetes Saccharomycetales Pichiaceae
p__Ascomycota Saccharomycetes Saccharomycetales Saccharomycetales_fam_Incertae_sedis Candida inconspicua
p__Ascomycota Saccharomycetes Saccharomycetales Saccharomycetales_fam_Incertae_sedis Candida sake
p__Ascomycota Saccharomycetes Saccharomycetales Pichiaceae Dekkera anomala
p__Ascomycota Saccharomycetes Saccharomycetales Pichiaceae Dekkera anomala
p__Ascomycota Saccharomycetes Saccharomycetales Pichiaceae Dekkera anomala
p__Ascomycota Saccharomycetes Saccharomycetales Pichiaceae Dekkera bruxellensis
p__Ascomycota Saccharomycetes Saccharomycetales Pichiaceae Dekkera bruxellensis
p__Ascomycota Saccharomycetes Saccharomycetales Saccharomycetales_fam_Incertae_sedis Starmerella davenportii
p__Basidiomycota Tremellomycetes Trichosporonales Trichosporonaceae Cutaneotrichosporon curvatus

All ASVs are assigned at the phylum level, and the phyla Ascomycota and Basidiomycota are represented. Most ASVs are Saccharomycetes. No filtering needed

Join tables

ASV_df <- ASV_ITS %>%
  gather("SampleID", "Reads", one_of(metadata_ITS$SampleID)) %>%
  mutate(Experiment="Tea (Fungi)") %>%
  full_join(ASV_16S_expts)
  
metadata_df <- metadata_16S %>%
  full_join(metadata_ITS)

Library sizes

The ranges of library sizes varies by experiment, rarefying should be performed separately for alpha diversity analyses.

The minimum library sizes for rarefying are:

Figure S1: Library Sizes

bottom_row <- plot_grid(ggdraw() + draw_image(file.path(figure_out, "minLibSizeTable.png")), ggplot() + theme_void(),  ncol = 2,  labels = c("B", ""), label_size = 15)

plot_grid(libSizePlot, bottom_row, ncol = 1, rel_widths = 1, rel_heights = c(1,0.3), labels = c("A", "B"), label_size = 15)

#ggsave(filename = file.path(figure_out, paste0(Sys.Date(), "FigureS1_library_sizes.png")))

Species accumulation curves and rarefaction curves

Species accumulation curves

## $`Chow chow`
## NULL
## 
## $Kimchi
## NULL
## 
## $Kombucha
## NULL
## 
## $`Kombucha (fungi)`
## NULL

Rarefaction curve

raremax <- map(asvMatrices, ~min(rowSums(.x))) 

vegan::rarecurve(asvMatrices$`Chow chow`, step = 20, sample = raremax$`Chow chow`, ylab = "ASVs", col = "blue", cex = 0.6, xlim=c(0,4000))
title(main="Chow Chow")

vegan::rarecurve(asvMatrices$Kimchi, step = 20, sample = raremax$Kimchi, ylab = "ASVs", label = FALSE, col = "blue", cex = 0.6, xlim=c(0,4000))
title(main="Kimchi")

vegan::rarecurve(asvMatrices$Kombucha, step = 20, sample = raremax$Kombucha, ylab = "ASVs", col = "blue", cex = 0.6, xlim=c(0,4000))
title(main="Kombucha")

vegan::rarecurve(asvMatrices$`Kombucha (fungi)`, step = 20, sample = raremax$`Kombucha (fungi)`, ylab = "ASVs", col = "blue", cex = 0.6, xlim=c(0,4000))
title(main="Kombucha (fungi)")

ASVs before rarefying

S <- map(asvMatrices, specnumber) # observed number of ASVs per sample
S %>%
  map(~as_tibble(.x, rownames = "SampleID")) %>%
  purrr::reduce(full_join) %>%
  left_join(metadata_df) %>%
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
         ControlSample=factor(ControlSample, levels=c("sample", "brine", "starter", "scoby"), labels = c("sample", "brine", "starter", "commercial scoby"))) %>%
  ggplot(aes(x=Experiment, y=value, color=ControlSample)) +
  geom_boxplot(alpha=0, color="gray") +
  geom_quasirandom(alpha=0.7, size=2, width=0.3) +
  scale_color_manual(values=c("#000000", "#56B4E9", "#D55E00", "#009E73")) +
  facet_wrap(~Primers, scales="free") +
  labs(x=NULL, y="N unique ASVs", color="Sample type")+
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank())

Analyses

Data prep

Alpha Diversity

Rarefy to common read depth for each experiment

rarefied_asv_df <- map2(asvMatrices, raremax, rrarefy) %>%
  map(~as_tibble(.x, rownames = "SampleID")) %>%
  map(~gather(.x, "ASVID", "rarefiedReads", 2:ncol(.x))) %>%
  purrr::reduce(full_join) %>%
  left_join(ASV_df)  %>%
  left_join(metadata_df)

Rarefied richness vs. observed richness

rarefied_asv_df %>%
  group_by(Experiment, SampleID) %>%
  summarize_at(vars("Reads", "rarefiedReads"), ~sum(.x>0)) %>%
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)"))) %>%
  ggplot(aes(x=Reads, y=rarefiedReads)) +
  geom_abline(yintercept=0, slope = 1, color="gray", linetype=2) +
  geom_point(shape=1) +
  facet_wrap(~Experiment, scales="free") +
  theme(aspect.ratio = 1) +
  labs(x="Observed no. of ASVs", y="Rarefied no. of ASVs")

Rarefying doesn’t impact richness much. Largest impact is in kimchi samples

Beta Diversity

Do not rarefy (McMurdie & Holmes, 2014) Normalize by calculating relative abundance Make ASV counts proportions

ASV_df_prop <- ASV_df %>%
  with_groups(SampleID, mutate, Abundance=Reads/sum(Reads))

Descriptives

ASVs per experiment:
Experiment No. unique genera No. unique ASVs
Chow chow 80 228
Kimchi 39 190
Kombucha 48 95
Kombucha (fungi) 4 12

Table of top taxa

ASV_df_prop %>%
  group_by(Experiment, ASVID, Family, Genus, Species) %>%
  summarize(`Mean abundance`=mean(Abundance)) %>%
  replace_na(list(Genus="", Species="")) %>%
  group_by(Experiment) %>%
  top_n(4, `Mean abundance`) %>%
  mutate(`Mean abundance`=round(`Mean abundance`, 2)) %>%
  arrange(Experiment, -`Mean abundance`) %>%
  ungroup %>%
  select(Family, Genus, Species, `Mean abundance`) %>%
  kableExtra::kable() %>%
  row_spec(0, bold=TRUE) %>%
  kable_classic(html_font = "Arial", full_width=FALSE) %>%
  pack_rows("Chow chow", 1, 4, bold = TRUE) %>%
  pack_rows("Kimchi", 5, 8, bold = TRUE) %>%
  pack_rows("Kombucha", 9, 12, bold = TRUE) %>%
  pack_rows("Kombucha (fungi)", 13, 16, bold = TRUE)
Family Genus Species Mean abundance
Chow chow
Streptococcaceae Lactococcus lactis 0.26
Enterobacteriaceae Klebsiella 0.22
Leuconostocaceae Weissella cibaria 0.12
Leuconostocaceae Leuconostoc lactis 0.08
Kimchi
Lactobacillaceae Latilactobacillus sakei 0.39
Leuconostocaceae Weissella koreensis 0.13
Enterobacteriaceae Klebsiella 0.05
Lactobacillaceae 0.04
Kombucha
Acetobacteraceae Komagataeibacter 0.60
Acetobacteraceae Komagataeibacter hansenii 0.29
Acetobacteraceae Komagataeibacter 0.05
Acetobacteraceae Komagataeibacter 0.03
Kombucha (fungi)
Pichiaceae Dekkera bruxellensis 0.98
Pichiaceae Dekkera anomala 0.01
Pichiaceae Dekkera anomala 0.01
Pichiaceae Dekkera anomala 0.00

Stacked bar plot

# get top Species and percents of species not in top 
top_species <- ASV_df_prop %>%
  fixTaxaNAs() %>%
  with_groups(c(SampleID, Experiment, Species2), summarize, Abundance=sum(Abundance)) %>%
  with_groups(c(Experiment, Species2), summarize, `Mean abundance`=mean(Abundance)) %>%
  group_by(Experiment) %>%
  top_n(9, `Mean abundance`) %>%
  select(Experiment, Species2) %>%
  filter(Species2!="p__Ascomycota")

ASV_df_prop %>%
  fixTaxaNAs() %>%
  with_groups(c(Experiment, Species2, SampleID), summarize, Abundance=sum(Abundance)) %>%
  right_join(top_species) %>%
  with_groups(c(Experiment, SampleID), summarize, x=sum(Abundance)) %>%
  with_groups(Experiment, summarize, `Percent of reads`=mean(x))
## # A tibble: 4 × 2
##   Experiment  `Percent of reads`
##   <chr>                    <dbl>
## 1 Chow                     0.858
## 2 Kimchi                   0.892
## 3 Tea                      0.999
## 4 Tea (Fungi)              1.00
other <- ASV_df_prop %>%
  fixTaxaNAs() %>%
  right_join(top_species) %>%
  group_by(SampleID) %>%
  mutate(Abundance=1-sum(Abundance)) %>%
  select(SampleID, Experiment, Abundance) %>%
  unique %>%
  mutate(Species2="Other")
# make factors for ordering samples
chowJars <- c( "brine", "T", "O", "P", "C", "TO", "TP", "TC", "OP1", "OC", "PC1", "PC2", "TOP", "TOC", "TPC", "OPC1",  "OPC2",  "TOPC1", "TOPC2", "TOPC3", "TOPC4", "TOPC5", "TOPC6")
kimchiJars <- c("KC1", "KC2", "KC3", "KC4", "KC5",  "KC6",  "KC7",  "KC8",  "KC9",  "KC10", "KC11", "KC12", "KR1",  "KR2",  "KR3",  "KR4",  "KR5",  "KR6",  "KR7",  "KR8",  "KR9", "KR10")
teaJars <- c("G2", "G3", "G4", "G5", "G6", "B2", "B3", "B4", "B5", "B6", "GB2", "GB3", "GB4", "GB5", "GB6", "GB7", "starter", "SCOBYPeak")

Figure 3: Microbial communities with species absolute abundance

chowColors <- c("gray40","#CC79A7", "#0072B2",  "#D55E00",  "#56B4E9", "#009E73",  "#E69F00", "darkslateblue", "#F0E442",  "darkgreen")
kimchiColors <- c("gray40", "#CC79A7", "darkblue", "khaki1", "mediumpurple4", "#009E73", "darkred", "lightskyblue3", "darkgreen", "salmon1")
kombuchaColors <- c("gray40", "#CC79A7", "indianred", "darkgoldenrod", "darkcyan", "darkblue","#0072B2", "mediumpurple4", "lightgoldenrod", "darkred")
kombuchaFungiColors <- c("gray40", "forestgreen", "chocolate3", "mediumorchid3", "blue3", "burlywood", "olivedrab4", "lightsteelblue")

barPlotColors <- list(chowColors,
                   kimchiColors, 
                   kombuchaColors,
                   kombuchaFungiColors)

bar_plots <- ASV_df_prop %>%
  select(-Reads) %>%
  fixTaxaNAs() %>%
  right_join(top_species) %>%
  full_join(other) %>%
  left_join(metadata_df) %>%
  with_groups(Experiment, mutate, DayO=dense_rank(DayN)) %>%
  mutate(DayO=factor(DayO, levels=c(1, 2), labels=c("First", "Second")),
        JarID=factor(JarID, levels=c(chowJars, kimchiJars, teaJars)),
        Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)"))) %>%
  replace_na(list(DayO="First")) %>%
  split(.$Experiment) %>%
  map(~mutate(.x, Species2=factor(Species2, levels = c("Other", sort(unique(grep("_|Other", Species2, value = TRUE, invert = TRUE))), sort(unique(grep("_", Species2, value = TRUE))))))) %>%
  map(~mutate(.x, Species3=case_when(str_detect(Species2, "_|Other")~Species2, 
                                     !str_detect(Species2, "_|Other|spp")~paste0("*", Species2, "*"),
                                     Species=="spp."~paste(paste0("*", Genus, "*"), Species)))) %>%
  map(~mutate(.x, Species3=factor(Species3, levels = c("Other", sort(unique(grep("_|Other", Species3, value = TRUE, invert = TRUE))), sort(unique(grep("_", Species3, value = TRUE))))))) %>%
  map2(., barPlotColors, ~ggplot(.x, aes(x=JarID, y=Abundance, fill=Species2)) +
        geom_bar(stat="identity") +
        facet_grid(DayO~Experiment, scales = "free_x") +
        scale_fill_manual(values=.y, labels=levels(.$Species3)) +
        theme(axis.text.x = element_text(angle=45, hjust=1, size=6),
        axis.text.y = element_blank(),
        legend.position = "bottom",
        legend.direction = "vertical",
        legend.text = element_markdown(),
        strip.text.y = element_blank()) +
        labs(x=NULL, y=NULL, fill="Species"))

plot_grid(bar_plots$`Chow chow` + theme(axis.text.y = element_text()) + labs(y="Relative abundance"),
          bar_plots$Kimchi,
          bar_plots$Kombucha,
          bar_plots$`Kombucha (fungi)` + theme(strip.text.y = element_text()),
          nrow=1,
          rel_widths = c(length(chowJars), length(kimchiJars), length(teaJars), length(teaJars)-1),
          align = "h",
          axis = "bt")

#ggsave(filename = file.path(figure_out, paste0(Sys.Date(), "_Figure3_abundance_bar_plot.png")))

Komagataeibacter proportion of fungi in kombucha samples

ASV_df_prop %>%
  filter(Genus=="Komagataeibacter",
         Experiment=="Tea") %>%
  left_join(metadata_df[,c("SampleID", "Day")]) %>%
  with_groups(c(SampleID, Day), summarize, Abundance=sum(Abundance)) %>%
  summarize(mean(Abundance))
## # A tibble: 1 × 1
##   `mean(Abundance)`
##               <dbl>
## 1             0.998

Dekkera bruxellensis proportion of fungi in kombucha samples

ASV_df_prop %>%
  filter(Genus=="Dekkera", 
         Species=="bruxellensis",
         Experiment=="Tea (Fungi)") %>%
  left_join(metadata_df[,c("SampleID", "Day")]) %>%
  with_groups(c(SampleID, Day), summarize, Abundance=sum(Abundance)) %>%
  with_groups(Day, summarize, mean(Abundance))
## # A tibble: 2 × 2
##   Day   `mean(Abundance)`
##   <chr>             <dbl>
## 1 eight             0.981
## 2 four              0.983

Does diversity increase with the number of plants?

Alpha Diversity

Does alpha diversity increase as the number of plants in the recipes increases?
+ Compare the samples from day 10 only for chow chow and day 8 for kombucha. Cannot use both days in the same statistical test because they are from the same sample. + Does alpha diversity increase as the number of plants increases from 1 to 4? Test with Kruskall-Wallace for chow chow and Wilcoxon ranked sum for kombucha.

diversity_df <- rarefied_asv_df %>%
  filter(!is.na(DayN)) %>%
  select(SampleID, Experiment, ASVID, rarefiedReads) %>%
  nest(ASVmatrix=c("SampleID", "ASVID", "rarefiedReads")) %>%
  mutate(ASVmatrix=map(ASVmatrix, ~spread(.x, ASVID, rarefiedReads)), #make a column of ASV matrices for vegan calculations
         ASVmatrix=map(ASVmatrix, ~column_to_rownames(.x, var="SampleID")),
         ASVmatrix=map(ASVmatrix, as.matrix)) %>%
 mutate(richness=map(ASVmatrix, vegan::specnumber),
        richness=map(richness, ~as_tibble(.x, rownames = "SampleID")),
        shannon=map(ASVmatrix, ~vegan::diversity(.x, "shannon")),
        shannon=map(shannon, ~as_tibble(.x, rownames = "SampleID"))) %>%
  select(Experiment, richness, shannon) %>%
  unnest(cols = c(richness, shannon), names_sep= "_") %>%
  select(-shannon_SampleID) %>%
  dplyr::rename(SampleID=richness_SampleID) %>%
  left_join(metadata_df, by=c("Experiment", "SampleID"))

Perform statistical tests

# Kruskall wallace for chow chow
diversity_df %>%
  filter(Experiment=="Chow", 
         DayN==10, # use second day only
         ControlSample=="sample") %>% 
  mutate(nPlants=as.character(nPlants)) %>%
  gather("measure", "value", c(richness_value, shannon_value)) %>%
  group_by(measure, Experiment) %>%
  kruskal_test(value~nPlants)
## # A tibble: 2 × 8
##   Experiment measure        .y.       n statistic    df     p method        
## * <chr>      <chr>          <chr> <int>     <dbl> <int> <dbl> <chr>         
## 1 Chow       richness_value value    22      5.99     3 0.112 Kruskal-Wallis
## 2 Chow       shannon_value  value    22      4.79     3 0.188 Kruskal-Wallis
# not significant

# wilcox for tea
diversity_df %>%
  filter(Experiment %in% c("Tea", "Tea (Fungi)"), 
         DayN==8, # use second day only
         ControlSample=="sample") %>% 
  mutate(nPlants=as.character(nPlants)) %>%
  gather("measure", "value", c(richness_value, shannon_value)) %>%
  group_by(measure, Experiment) %>%
  wilcox_test(value~nPlants)
## # A tibble: 4 × 9
##   Experiment  measure        .y.   group1 group2    n1    n2 statistic     p
## * <chr>       <chr>          <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1 Tea         richness_value value 1      2         10     6      33.5 0.743
## 2 Tea (Fungi) richness_value value 1      2         10     6      35   0.571
## 3 Tea         shannon_value  value 1      2         10     6      28   0.875
## 4 Tea (Fungi) shannon_value  value 1      2         10     6      29   0.958
# no significant differences

Figure 4: Impacts of substrate number on alpha diversity

richness_nPlant_plot <- diversity_df %>%
  filter(Experiment %in% c("Chow", "Tea", "Tea (Fungi)"), 
         DayN %in% c(8, 10),
         ControlSample=="sample") %>%
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kombucha", "Kombucha (fungi)")),
         nPlants=as.character(nPlants)) %>%
  ggplot(aes(x=nPlants, y=richness_value, group=nPlants)) +
  geom_boxplot(alpha=0) +
  geom_point(alpha=0.5) +
  facet_wrap(~Experiment, scales = "free") +
  labs(x=NULL, y="Richness")+
  scale_y_continuous(limits = c(0, NA)) +
  theme(axis.text.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank())

shannon_nPlant_plot <- diversity_df %>%
  filter(Experiment %in% c("Chow", "Tea", "Tea (Fungi)"), 
         DayN %in% c(8, 10),
         ControlSample=="sample") %>%
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kombucha", "Kombucha (fungi)")),
         nPlants=as.character(nPlants)) %>%
  ggplot(aes(x=nPlants, y=shannon_value, group=nPlants)) +
  geom_boxplot(alpha=0) +
  geom_point(alpha=0.5) +
  scale_y_continuous(limits = c(0, NA)) + 
  facet_wrap(~Experiment, scales = "free") +
  theme(strip.text.x = element_blank()) +
  labs(x="No. of plant substrates", y="Shannon index")+
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank())

plot_grid(richness_nPlant_plot, shannon_nPlant_plot, nrow=2)

#ggsave(filename = file.path(figure_out, paste0(Sys.Date(), "Figure4_alpha_vs_nsubstrates.png")))

Does the substrate impact diversity?

Alpha diversity

Perform statistical tests

# Kruskall wallace for Tea
alpha_by_substrate_tea <- diversity_df %>%
  filter(Experiment %in% c("Tea", "Tea (Fungi)"), 
         DayN==8,
         ControlSample=="sample") %>%
  mutate(Plant=factor(Plant, levels=c("green", "black", "greenblack"), 
                            labels=c("Green", "Black", "Green & Black"))) %>%
  gather("measure", "value", c(richness_value, shannon_value)) %>%
  group_by(measure, Experiment) %>%
  kruskal_test(value~Plant)
alpha_by_substrate_tea
## # A tibble: 4 × 8
##   Experiment  measure        .y.       n statistic    df      p method        
## * <chr>       <chr>          <chr> <int>     <dbl> <int>  <dbl> <chr>         
## 1 Tea         richness_value value    16     5.80      2 0.0551 Kruskal-Wallis
## 2 Tea (Fungi) richness_value value    16     0.610     2 0.737  Kruskal-Wallis
## 3 Tea         shannon_value  value    16     4.85      2 0.0884 Kruskal-Wallis
## 4 Tea (Fungi) shannon_value  value    16     0.294     2 0.863  Kruskal-Wallis
# not significant

# wilcox for kimchi
alpha_by_substrate_kimchi <- diversity_df %>%
  filter(Experiment=="Kimchi", 
         DayN==10,
         ControlSample=="sample") %>%
  mutate(Plant=factor(Plant, levels=c("Cabbage", "Radish"), 
                             labels=c("Cabbage", "Radish"))) %>%
  gather("measure", "value", c(richness_value, shannon_value)) %>%
  group_by(measure, Experiment) %>%
  wilcox_test(value~Plant, alternative="two.sided") %>%
  adjust_pvalue(p.col="p", method = "BH")
alpha_by_substrate_kimchi
## # A tibble: 2 × 10
##   Experiment measure   .y.   group1 group2    n1    n2 statistic       p   p.adj
##   <chr>      <chr>     <chr> <chr>  <chr>  <int> <int>     <dbl>   <dbl>   <dbl>
## 1 Kimchi     richness… value Cabba… Radish    12    10       106 0.00257 0.00514
## 2 Kimchi     shannon_… value Cabba… Radish    12    10        94 0.0249  0.0249
# Both significant

Plot

richness_Plant_plot <- diversity_df %>%
  filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"), DayN %in% c(8, 10), ControlSample=="sample") %>%
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
         Plant=factor(Plant, levels=c("Cabbage", "Radish", "green", "black", "greenblack"), labels=c("Cabbage", "Radish", "Green", "Black", "Green & Black")),
         plabel=case_when(Experiment=="Kimchi"~"*",
                          Experiment!="Kimchi"~NA)) %>%
  with_groups(Experiment, mutate, py=max(richness_value)) %>%
  ggplot(aes(x=Plant, y=richness_value, group=Plant, label=plabel)) +
  geom_boxplot(alpha=0) +
  geom_point(alpha=0.5) +
  geom_text(aes(x=1.5, y=py-2), size=5) +
  facet_wrap(~Experiment, scales = "free") +
  scale_y_continuous(limits = c(0, NA)) +
  theme(axis.text.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank()) +
  labs(x=NULL, y="Richness")

shannon_Plant_plot <- diversity_df %>%
  filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"), DayN %in% c(8, 10), ControlSample=="sample") %>%
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
         Plant=factor(Plant, levels=c("Cabbage", "Radish", "green", "black", "greenblack"), labels=c("Cabbage", "Radish", "Green", "Black", "Green\n&\nBlack")),
         plabel=case_when(Experiment=="Kimchi"~"*",
                          Experiment!="Kimchi"~NA)) %>%
  with_groups(Experiment, mutate, py=max(shannon_value)) %>%
  ggplot(aes(x=Plant, y=shannon_value, group=Plant, label=plabel)) +
  geom_boxplot(alpha=0) +
  geom_point(alpha=0.5) +
  geom_text(aes(x=1.5, y=py-0.3), size=5) +
  facet_wrap(~Experiment, scales = "free") +
  scale_y_continuous(limits = c(0, NA)) +
  theme(strip.text.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank()) +
  labs(x="Substrate", y="Shannon index")
                      
substrate_alpha_plot <- plot_grid(richness_Plant_plot, shannon_Plant_plot, nrow=2, rel_heights = c(1, 1.27))

Beta diversity

Calculate Bray-Curtis distances and visualize with NMDS Substrates (second day only)

set.seed(123)

nmds <- ASV_df_prop %>%
  left_join(metadata_df) %>%
  filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"),
         ControlSample=="sample",
         DayN %in% c(8, 10)) %>%
  select(SampleID, Experiment, ASVID, Abundance) %>%
  split(.$Experiment) %>%
  map(~select(.x, -Experiment)) %>%
  map(~spread(.x, ASVID, Abundance)) %>%
  map(~column_to_rownames(.x, var = "SampleID")) %>%
  map(as.matrix) %>%
  map(~metaMDS(.x, distance = "bray"))
## Run 0 stress 0.1208933 
## Run 1 stress 0.1208933 
## ... New best solution
## ... Procrustes: rmse 1.808388e-05  max resid 4.655066e-05 
## ... Similar to previous best
## Run 2 stress 0.127527 
## Run 3 stress 0.09663717 
## ... New best solution
## ... Procrustes: rmse 0.1259782  max resid 0.4879174 
## Run 4 stress 0.1208933 
## Run 5 stress 0.1287598 
## Run 6 stress 0.1268688 
## Run 7 stress 0.1229075 
## Run 8 stress 0.1289299 
## Run 9 stress 0.1287597 
## Run 10 stress 0.1241837 
## Run 11 stress 0.127527 
## Run 12 stress 0.1144112 
## Run 13 stress 0.1296422 
## Run 14 stress 0.1241837 
## Run 15 stress 0.1268687 
## Run 16 stress 0.1229076 
## Run 17 stress 0.1199214 
## Run 18 stress 0.1227962 
## Run 19 stress 0.09663717 
## ... Procrustes: rmse 8.986313e-06  max resid 2.542821e-05 
## ... Similar to previous best
## Run 20 stress 0.1208933 
## *** Best solution repeated 1 times
## Run 0 stress 0.01126436 
## Run 1 stress 0.01126439 
## ... Procrustes: rmse 3.331307e-05  max resid 0.0001063497 
## ... Similar to previous best
## Run 2 stress 0.01126436 
## ... New best solution
## ... Procrustes: rmse 9.097059e-05  max resid 0.0002582499 
## ... Similar to previous best
## Run 3 stress 0.04072022 
## Run 4 stress 0.04011555 
## Run 5 stress 0.01126431 
## ... New best solution
## ... Procrustes: rmse 5.095299e-05  max resid 0.0001061652 
## ... Similar to previous best
## Run 6 stress 0.01126432 
## ... Procrustes: rmse 4.954176e-05  max resid 0.0001633399 
## ... Similar to previous best
## Run 7 stress 0.0112643 
## ... New best solution
## ... Procrustes: rmse 8.783971e-06  max resid 2.86955e-05 
## ... Similar to previous best
## Run 8 stress 0.01784174 
## Run 9 stress 0.04072026 
## Run 10 stress 0.01126438 
## ... Procrustes: rmse 8.653706e-05  max resid 0.0002597349 
## ... Similar to previous best
## Run 11 stress 0.01784186 
## Run 12 stress 0.01126439 
## ... Procrustes: rmse 8.987895e-05  max resid 0.0002412924 
## ... Similar to previous best
## Run 13 stress 0.04072023 
## Run 14 stress 0.3063679 
## Run 15 stress 0.01784179 
## Run 16 stress 0.04072018 
## Run 17 stress 0.01126439 
## ... Procrustes: rmse 0.0001018014  max resid 0.000343954 
## ... Similar to previous best
## Run 18 stress 0.01126439 
## ... Procrustes: rmse 0.0001002056  max resid 0.0003355294 
## ... Similar to previous best
## Run 19 stress 0.01784187 
## Run 20 stress 0.01126433 
## ... Procrustes: rmse 5.383372e-05  max resid 0.000182465 
## ... Similar to previous best
## *** Best solution repeated 6 times
## Run 0 stress 0.001558399 
## Run 1 stress 0.002553419 
## Run 2 stress 0.003591747 
## Run 3 stress 0.005606527 
## Run 4 stress 0.005925471 
## Run 5 stress 0.003227221 
## Run 6 stress 0.007715555 
## Run 7 stress 0.005462563 
## Run 8 stress 0.006489388 
## Run 9 stress 0.003718972 
## Run 10 stress 0.006054452 
## Run 11 stress 0.003266549 
## Run 12 stress 0.005830314 
## Run 13 stress 0.002795648 
## Run 14 stress 0.007130729 
## Run 15 stress 0.003833059 
## Run 16 stress 0.003509317 
## Run 17 stress 0.007856374 
## Run 18 stress 0.005312844 
## Run 19 stress 0.006906621 
## Run 20 stress 0.003073519 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: no. of iterations >= maxit
data.scores <- nmds %>%
  map(~scores(.x)$sites) %>%
  map(as.data.frame) %>%
  map(~rownames_to_column(.x, var="SampleID")) %>%
  map2(., names(.), ~mutate(.x, Experiment=.y)) %>%
  purrr::reduce(full_join) %>%
  left_join(metadata_df) %>%
  mutate(Plant=factor(Plant, levels=c("Cabbage", "Radish", "green", "black", "greenblack"), labels=c("Cabbage", "Radish", "Green", "Black", "Green & Black")))

plantNMDSKimchi <- data.scores %>%
  filter(Experiment=="Kimchi") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
  geom_point() +
  facet_wrap(~Experiment, scales = "free") +
  scale_color_manual(values=c("#0072B2", "#D55E00")) +
  theme(legend.position = "none") +
  labs(x=NULL, y=NULL)

kimchiLegend <- get_legend(
  plantNMDSKimchi +
  theme(legend.position = "bottom",
        legend.direction = "vertical",
        legend.justification = c(0,1)) +
  labs(color="Kimchi substrates"))

plantNMDSTea3 <- data.scores %>%
  filter(Experiment=="Tea") %>%
  mutate(Experiment="Kombucha: all conditions") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
  geom_point() +
  facet_wrap(~Experiment, scales = "free") +
  scale_color_manual(values=c("#009E73","#000000", "#CC79A7")) +
  theme(legend.position = "none") +
  labs(y=NULL, x=NULL)
  
teaLegend <- get_legend(
  plantNMDSTea3 +
  #guides(color=guide_legend(ncol = 2)) +
  theme(legend.position = "bottom",
        legend.direction = "vertical",
        legend.justification = c(0,1)) +
  labs(color="Kombucha substrates"))


plantNMDSTeaITS <- data.scores %>%
  filter(Experiment=="Tea (Fungi)") %>%
  mutate(Experiment="Kombucha (fungi)") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
  geom_point() +
  facet_wrap(~Experiment, scales = "free") +
  scale_color_manual(values=c("#009E73","#000000", "#CC79A7")) +
  theme(legend.position = "none",
        axis.text.x = element_text(size=10)) +
  labs(y=NULL, x=NULL)

#tea with only green or black samples only
nmds <- ASV_df_prop %>%
  left_join(metadata_df) %>%
  filter(Experiment %in% c("Tea", "Tea (Fungi)"),
         ControlSample=="sample",
         DayN==8,
         Plant %in% c("green", "black")) %>%
  select(SampleID, Experiment, ASVID, Abundance) %>%
  split(.$Experiment) %>%
  map(~select(.x, -Experiment)) %>%
  map(~spread(.x, ASVID, Abundance)) %>%
  map(~column_to_rownames(.x, var = "SampleID")) %>%
  map(as.matrix) %>%
  map(~metaMDS(.x, distance = "bray"))
## Run 0 stress 0.0001979636 
## Run 1 stress 0.002999503 
## Run 2 stress 0.001481388 
## Run 3 stress 0.003854223 
## Run 4 stress 0.0008800118 
## Run 5 stress 0.004516088 
## Run 6 stress 0.00161516 
## Run 7 stress 0.004567798 
## Run 8 stress 0.003015539 
## Run 9 stress 0.004088506 
## Run 10 stress 0.001841872 
## Run 11 stress 0.001527751 
## Run 12 stress 0.267585 
## Run 13 stress 0.2909045 
## Run 14 stress 0.001617847 
## Run 15 stress 0.003780789 
## Run 16 stress 0.004549937 
## Run 17 stress 0.002416143 
## Run 18 stress 0.001824797 
## Run 19 stress 0.003793538 
## Run 20 stress 0.002425196 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     17: no. of iterations >= maxit
##      2: stress ratio > sratmax
##      1: scale factor of the gradient < sfgrmin
## Run 0 stress 0 
## Run 1 stress 9.717209e-05 
## ... Procrustes: rmse 0.1368804  max resid 0.2899485 
## Run 2 stress 9.775551e-05 
## ... Procrustes: rmse 0.04444806  max resid 0.07970759 
## Run 3 stress 8.986117e-05 
## ... Procrustes: rmse 0.06600979  max resid 0.1072668 
## Run 4 stress 0.0002962278 
## ... Procrustes: rmse 0.05635873  max resid 0.1190616 
## Run 5 stress 9.667155e-05 
## ... Procrustes: rmse 0.06736211  max resid 0.1480469 
## Run 6 stress 8.90455e-05 
## ... Procrustes: rmse 0.1504335  max resid 0.3254534 
## Run 7 stress 0.0004860919 
## ... Procrustes: rmse 0.06615963  max resid 0.1073563 
## Run 8 stress 9.295456e-05 
## ... Procrustes: rmse 0.1152558  max resid 0.2325063 
## Run 9 stress 8.521991e-05 
## ... Procrustes: rmse 0.03167347  max resid 0.05122187 
## Run 10 stress 9.775636e-05 
## ... Procrustes: rmse 0.06554206  max resid 0.1401172 
## Run 11 stress 9.809775e-05 
## ... Procrustes: rmse 0.05452494  max resid 0.1284415 
## Run 12 stress 0.0007590797 
## Run 13 stress 9.979202e-05 
## ... Procrustes: rmse 0.0767341  max resid 0.1528814 
## Run 14 stress 9.908352e-05 
## ... Procrustes: rmse 0.04052691  max resid 0.07716616 
## Run 15 stress 9.984045e-05 
## ... Procrustes: rmse 0.06071516  max resid 0.124504 
## Run 16 stress 0.0002293478 
## ... Procrustes: rmse 0.05709814  max resid 0.1424378 
## Run 17 stress 0.0002877755 
## ... Procrustes: rmse 0.06256337  max resid 0.1164542 
## Run 18 stress 9.703947e-05 
## ... Procrustes: rmse 0.04414064  max resid 0.09118404 
## Run 19 stress 9.681282e-05 
## ... Procrustes: rmse 0.04899204  max resid 0.07891642 
## Run 20 stress 9.904094e-05 
## ... Procrustes: rmse 0.05947261  max resid 0.1215823 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      5: no. of iterations >= maxit
##     15: stress < smin
data.scores <- nmds %>%
  map(~scores(.x)$sites) %>%
  map(as.data.frame) %>%
  map(~rownames_to_column(.x, var="SampleID")) %>%
  map2(., names(.), ~mutate(.x, Experiment=.y)) %>%
  purrr::reduce(full_join) %>%
  left_join(metadata_df) %>%
  mutate(Plant=factor(Plant, levels=c("green", "black"), labels=c("Green", "Black")))

plantNMDSTea2 <- data.scores %>%
  filter(Experiment=="Tea") %>%
  mutate(Experiment="Kombucha: green OR black") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
  geom_point() +
  facet_wrap(~Experiment, scales = "free") +
  scale_color_manual(values=c("#009E73","#000000")) +
  theme(legend.position = "none",
        strip.text.x = element_text(size=9)) +
  labs(x=NULL, y=NULL)

plantNMDSTeaITS2 <- data.scores %>%
  filter(Experiment=="Tea (Fungi)") %>%
  mutate(Experiment="Kombucha (f): green OR black") %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=Plant)) +
  geom_point() +
  facet_wrap(~Experiment, scales = "free") +
  scale_color_manual(values=c("#009E73","#000000")) +
  theme(legend.position = "none",
        axis.text.x = element_text(size=10),
        strip.text.x = element_text(size=8)) +
  labs(x=NULL, y=NULL)

plot

legends <- plot_grid(kimchiLegend, teaLegend, theme_void(), ncol=1, rel_heights = c(1, 1, 0.5))
row1 <- plot_grid(plantNMDSKimchi, plantNMDSTea3, plantNMDSTea2, nrow=1)
row2 <- plot_grid(plantNMDSTeaITS, plantNMDSTeaITS2, legends, nrow=1, rel_widths = c(1, 1, 0.7))
row12 <- plot_grid(row1, add_sub(row2, "NMDS1"), ncol=1, rel_heights = c(1, 1.1))
substrate_beta_plot <- plot_grid(add_sub(plot_grid(ggplot+theme_void()+theme(panel.grid.minor = element_blank()), row12, nrow=1, rel_widths = c(0.05,1)), "NMDS2",  angle=90, x=0.05, y=3.9))

PERMANOVA

#tea grean, black, and green and black
bray_by_plant_data  <- metadata_df %>%
  filter(Experiment %in% c("Tea", "Tea (Fungi)"),
         ControlSample=="sample",
         DayN %in% c(8, 10)) %>%
  column_to_rownames(var="SampleID") %>%
  split(.$Experiment)

bray_by_plant_asvs <- ASV_df_prop %>%
  left_join(metadata_df) %>%
  filter(Experiment %in% c("Tea", "Tea (Fungi)"),
         ControlSample=="sample",
         DayN %in% c(8, 10)) %>%
  select(Experiment, SampleID, ASVID, Abundance) %>%
  split(.$Experiment) %>%
  map(~spread(.x, ASVID, Abundance)) %>%
  map(~select(.x, -Experiment)) %>%
  map(~column_to_rownames(.x, var="SampleID"))

set.seed(021324)
map2(bray_by_plant_data, bray_by_plant_asvs, ~adonis2(formula=.y~nPlants+Plant, data=.x, permutations=999, method="bray"))
## $Tea
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ nPlants + Plant, data = .x, permutations = 999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     2  0.41983 0.35997 3.6558   0.05 *
## Residual 13  0.74646 0.64003                
## Total    15  1.16629 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Tea (Fungi)`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ nPlants + Plant, data = .x, permutations = 999, method = "bray")
##          Df   SumOfSqs      R2      F Pr(>F)
## Model     2 0.00003613 0.01702 0.1126   0.93
## Residual 13 0.00208642 0.98298              
## Total    15 0.00212255 1.00000

Number of plants not significant, so it was removed.

set.seed(021324)
map2(bray_by_plant_data, bray_by_plant_asvs, ~adonis2(formula=.y~Plant, data=.x, permutations=999, method="bray"))
## $Tea
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     2  0.41983 0.35997 3.6558   0.05 *
## Residual 13  0.74646 0.64003                
## Total    15  1.16629 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Tea (Fungi)`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
##          Df   SumOfSqs      R2      F Pr(>F)
## Model     2 0.00003613 0.01702 0.1126   0.93
## Residual 13 0.00208642 0.98298              
## Total    15 0.00212255 1.00000
#kimchi and 2 teas
bray_by_plant_data  <- metadata_df %>%
  filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"),
         ControlSample=="sample",
         DayN %in% c(8, 10),
         Plant!="greenblack") %>%
  column_to_rownames(var="SampleID") %>%
  split(.$Experiment)

bray_by_plant_asvs <- ASV_df_prop %>%
  left_join(metadata_df) %>%
  filter(Experiment %in% c("Kimchi", "Tea", "Tea (Fungi)"),
         ControlSample=="sample",
         DayN %in% c(8, 10),
         Plant!="greenblack") %>%
  select(Experiment, SampleID, ASVID, Abundance) %>%
  split(.$Experiment) %>%
  map(~spread(.x, ASVID, Abundance)) %>%
  map(~select(.x, -Experiment)) %>%
  map(~column_to_rownames(.x, var="SampleID"))

set.seed(021324)
map2(bray_by_plant_data, bray_by_plant_asvs, ~adonis2(formula=.y~Plant, data=.x, permutations=999, method="bray"))
## $Kimchi
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1  0.62055 0.20049 5.0154  0.001 ***
## Residual 20  2.47461 0.79951                  
## Total    21  3.09516 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Tea
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1  0.37947 0.47689 7.2931  0.042 *
## Residual  8  0.41625 0.52311                
## Total     9  0.79573 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Tea (Fungi)`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Plant, data = .x, permutations = 999, method = "bray")
##          Df   SumOfSqs      R2      F Pr(>F)
## Model     1 0.00002917 0.01558 0.1267  0.772
## Residual  8 0.00184244 0.98442              
## Total     9 0.00187161 1.00000

Figure 5: Impacts of substrate type on diversity

plot_grid(substrate_alpha_plot, substrate_beta_plot, nrow = 2, rel_heights = c(0.8, 1), labels = c("A", "B"))

#ggsave(filename = file.path(figure_out, paste(Sys.Date(), "Figure5_substrate_figure.png", sep = "_")))

Succession over time

pH over time

statistical tests

#Chow chow and tea 
#kruskall wallis
pHkruskalls <- pH_df %>%
  filter(Experiment=="Chow"|Experiment=="Tea",
         ControlSample=="sample") %>%
  with_groups(JarID, mutate, DayO=dense_rank(Day)) %>%
  group_by(Experiment) %>%
  kruskal_test(pH~DayO)
pHkruskalls
## # A tibble: 2 × 7
##   Experiment .y.       n statistic    df        p method        
## * <chr>      <chr> <int>     <dbl> <int>    <dbl> <chr>         
## 1 Chow       pH       71      47.6     2 4.67e-11 Kruskal-Wallis
## 2 Tea        pH       64      57.8     3 1.7 e-12 Kruskal-Wallis
#dunns posthoc
pH_df %>%
  filter(Experiment=="Chow"|Experiment=="Tea",
         ControlSample=="sample") %>%
  with_groups(JarID, mutate, DayO=dense_rank(Day)) %>%
  group_by(Experiment) %>%
  dunn_test(pH~DayO, p.adjust.method = "BH")
## # A tibble: 9 × 10
##   Experiment .y.   group1 group2    n1    n2 statistic        p    p.adj
## * <chr>      <chr> <chr>  <chr>  <int> <int>     <dbl>    <dbl>    <dbl>
## 1 Chow       pH    1      2         25    24     -4.95 7.28e- 7 1.09e- 6
## 2 Chow       pH    1      3         25    22     -6.59 4.53e-11 1.36e-10
## 3 Chow       pH    2      3         24    22     -1.73 8.43e- 2 8.43e- 2
## 4 Tea        pH    1      2         16    16     -2.50 1.23e- 2 1.85e- 2
## 5 Tea        pH    1      3         16    16     -4.85 1.21e- 6 3.64e- 6
## 6 Tea        pH    1      4         16    16     -7.23 4.74e-13 2.85e-12
## 7 Tea        pH    2      3         16    16     -2.35 1.87e- 2 1.87e- 2
## 8 Tea        pH    2      4         16    16     -4.73 2.25e- 6 4.49e- 6
## 9 Tea        pH    3      4         16    16     -2.38 1.74e- 2 1.87e- 2
## # ℹ 1 more variable: p.adj.signif <chr>
#kimchi 
kimchipHinput <- pH_df %>%
  filter(Experiment=="Kimchi",
         ControlSample=="sample") %>%
  with_groups(JarID, mutate, DayO=dense_rank(Day)) %>%
  select(JarID, pH, DayO) %>%
  spread(DayO, pH)
kimchipHwilcox <- wilcox.test(kimchipHinput$`1`, kimchipHinput$`2`,  paired = TRUE, alternative = "greater")
kimchipHwilcox
## 
##  Wilcoxon signed rank exact test
## 
## data:  kimchipHinput$`1` and kimchipHinput$`2`
## V = 253, p-value = 2.384e-07
## alternative hypothesis: true location shift is greater than 0
kimchipHwilcox$p.value
## [1] 2.384186e-07
pH_pvalues <- tribble(~Experiment, ~p.value,
                      "Chow chow", pHkruskalls$p[pHkruskalls$Experiment=="Chow"],
                      "Kimchi", kimchipHwilcox$p.value,
                      "Kombucha", pHkruskalls$p[pHkruskalls$Experiment=="Tea"]) %>%
  mutate(p.value=formatC(p.value, format = "e", digits = 2))

pH_pgroups <- tribble(~Experiment, ~Day, ~label,
                      "Chow chow", 0, "a",
                      "Chow chow", 3, "b",
                      "Chow chow", 10, "b",
                      "Kimchi", 3, "a",
                      "Kimchi", 10, "b",
                      "Kombucha", 0, "a",
                      "Kombucha", 4, "b",
                      "Kombucha", 8, "c",
                      "Kombucha", 21, "d")

Plot

pH_day_plot <- pH_df %>%
  add_row(Experiment="Kimchi", pH=NULL, Day=0, ControlSample="sample") %>% # add fake row to make chow chow and kimchi x-axis scales the same
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
           ControlSample=factor(ControlSample, levels=c("sample", "brine", "starter"))) %>%
  ggplot(aes(x=Day, y=pH)) +
  geom_line(aes(group=JarID), color="gray") +
  geom_point(aes(color=ControlSample), alpha=0.6) +
  geom_text(data=pH_pgroups, aes(x=Day, y=7.6, label=label)) +
  geom_text(data = pH_pvalues, aes(x=0, y=0.25, label=paste("p = ", p.value)), hjust="left") +
  okabe_ito_colors +
  scale_x_continuous(n.breaks = 6) +
  scale_y_continuous(breaks = c(0, 2, 4, 6, 8), labels = c(0, 2, 4, 6, 8), limits = c(0,8)) +
  facet_wrap(~Experiment, nrow=1, scales="free_x") +
  theme(axis.text = element_text(size=11), 
        legend.title = element_text(size=15), 
        legend.text = element_text(size=13),
        panel.grid.minor = element_blank()) +
  labs(x="Day", y="pH", color="Sample type")
  
pH_day_plot

Alpha Diversity

Does alpha diversity increase as the number of days elapsed increases?
+ Test with Wilcoxon Rank-Sum test, comparing raw richness and Shannon index between 3 and 10 days (4 and 8 for tea experiment)

Run paired wilcoxon test

diversity_df %>%
  with_groups(JarID, filter, n_distinct(SampleID)=="2"|n_distinct(SampleID)==4) %>% # remove samples that don't have two timepoint measurements
  with_groups(Experiment, mutate, DayO=dense_rank(DayN)) %>%
  mutate(DayO=factor(DayO, levels=c(1, 2), labels=c("First", "Second"))) %>%
  select(JarID, Experiment, DayO, richness_value, shannon_value) %>%
  gather("measure", "value", c(richness_value, shannon_value)) %>%
  spread(DayO, value) %>%
  split(list(.$measure, .$Experiment)) %>%
  map(~wilcox.test(.x$Second, .x$First,  paired = TRUE, alternative = "greater")) %>%
  map(tidy) %>%
  map2(., names(.), ~mutate(.x, x=.y)) %>%
  purrr::reduce(full_join) %>%
  separate(x, into=c("measure", "Experiment"), sep="\\.") %>%
  select(measure, Experiment, statistic, p.value) %>%
  arrange(measure) %>%
  group_by(Experiment) %>%
  adjust_pvalue(p.col="p.value", method = "BH")
## # A tibble: 8 × 5
##   Experiment  measure        statistic p.value p.value.adj
##   <chr>       <chr>              <dbl>   <dbl>       <dbl>
## 1 Chow        richness_value     120.   0.167        0.333
## 2 Chow        shannon_value      124    0.538        0.538
## 3 Kimchi      richness_value     175    0.0593       0.119
## 4 Kimchi      shannon_value      100    0.806        0.806
## 5 Tea         richness_value      75.5  0.0780       0.156
## 6 Tea         shannon_value       33    0.983        0.983
## 7 Tea (Fungi) richness_value      17.5  0.294        0.294
## 8 Tea (Fungi) shannon_value       91    0.259        0.294
richness_day_plot <- diversity_df %>%
  with_groups(JarID, filter, n_distinct(SampleID)=="2"|n_distinct(SampleID)==4) %>% # remove samples that don't have two timepoint measurements
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
         DayN=as.character(DayN),
         DayN=factor(DayN, levels=c("3", "4", "8","10"))) %>%
  ggplot(aes(x=DayN, y=richness_value)) +
  geom_line(aes(group=JarID), color="gray") +
  geom_point(alpha=0.5) +
  scale_x_discrete(drop=TRUE) +
  facet_wrap(~Experiment, scales = "free", nrow=1) +
  scale_y_continuous(limits = c(0, NA)) +
  theme(axis.text.x = element_blank(),
        axis.text = element_text(size=11),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank()) +
  labs(x=NULL, y="Richness")

shannon_day_plot <- diversity_df %>%
  with_groups(JarID, filter, n_distinct(SampleID)=="2"|n_distinct(SampleID)==4) %>% # remove samples that don't have two timepoint measurements
  mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
         DayN=as.character(DayN),
         DayN=factor(DayN, levels=c("3", "4", "8","10"))) %>%
  ggplot(aes(x=DayN, y=shannon_value)) +
  geom_line(aes(group=JarID), color="gray") +
  geom_point(alpha=0.5) +
  scale_x_discrete(drop=TRUE) +
  facet_wrap(~Experiment, scales = "free", nrow = 1) +
  scale_y_continuous(limits = c(0, NA)) +
  theme(strip.text.x = element_blank(),
        axis.text = element_text(size=11),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank()) +
  labs(x="Day", y="Shannon index")

succession_alpha_plot <- plot_grid(richness_day_plot, shannon_day_plot, nrow=2, align = "v", axis="lr")
succession_alpha_plot

Beta diversity

plot

set.seed(123)

nmds <- ASV_df_prop %>%
  select(SampleID, Experiment, ASVID, Abundance) %>%
  split(.$Experiment) %>%
  map(~select(.x, -Experiment)) %>%
  map(~spread(.x, ASVID, Abundance)) %>%
  map(~column_to_rownames(.x, var = "SampleID")) %>%
  map(as.matrix) %>%
  map(~metaMDS(.x, distance = "bray"))
## Run 0 stress 0.1774703 
## Run 1 stress 0.2146611 
## Run 2 stress 0.1944882 
## Run 3 stress 0.1759535 
## ... New best solution
## ... Procrustes: rmse 0.08530666  max resid 0.5111269 
## Run 4 stress 0.1962391 
## Run 5 stress 0.17615 
## ... Procrustes: rmse 0.09668807  max resid 0.5841405 
## Run 6 stress 0.1751244 
## ... New best solution
## ... Procrustes: rmse 0.1005305  max resid 0.6174738 
## Run 7 stress 0.1759353 
## Run 8 stress 0.1759481 
## Run 9 stress 0.1957706 
## Run 10 stress 0.2093671 
## Run 11 stress 0.1759983 
## Run 12 stress 0.1861465 
## Run 13 stress 0.3929351 
## Run 14 stress 0.1957543 
## Run 15 stress 0.1947856 
## Run 16 stress 0.2015187 
## Run 17 stress 0.1759843 
## Run 18 stress 0.2023919 
## Run 19 stress 0.1753271 
## ... Procrustes: rmse 0.008028507  max resid 0.03546564 
## Run 20 stress 0.1774703 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      5: no. of iterations >= maxit
##     15: stress ratio > sratmax
## Run 0 stress 0.1460225 
## Run 1 stress 0.160676 
## Run 2 stress 0.1460225 
## ... New best solution
## ... Procrustes: rmse 7.387893e-06  max resid 3.19048e-05 
## ... Similar to previous best
## Run 3 stress 0.207423 
## Run 4 stress 0.180311 
## Run 5 stress 0.1614875 
## Run 6 stress 0.191577 
## Run 7 stress 0.1462274 
## ... Procrustes: rmse 0.006019354  max resid 0.0297096 
## Run 8 stress 0.1696063 
## Run 9 stress 0.1930495 
## Run 10 stress 0.1565812 
## Run 11 stress 0.176654 
## Run 12 stress 0.1943849 
## Run 13 stress 0.173934 
## Run 14 stress 0.173934 
## Run 15 stress 0.177754 
## Run 16 stress 0.1460225 
## ... New best solution
## ... Procrustes: rmse 1.263504e-05  max resid 4.57652e-05 
## ... Similar to previous best
## Run 17 stress 0.1460225 
## ... New best solution
## ... Procrustes: rmse 1.177535e-05  max resid 5.474131e-05 
## ... Similar to previous best
## Run 18 stress 0.2239994 
## Run 19 stress 0.1620144 
## Run 20 stress 0.1622134 
## *** Best solution repeated 1 times
## Run 0 stress 0.02536552 
## Run 1 stress 0.05379719 
## Run 2 stress 0.02536552 
## ... Procrustes: rmse 1.394032e-05  max resid 3.923488e-05 
## ... Similar to previous best
## Run 3 stress 0.05302885 
## Run 4 stress 0.02536552 
## ... New best solution
## ... Procrustes: rmse 1.439869e-05  max resid 3.26021e-05 
## ... Similar to previous best
## Run 5 stress 0.05302885 
## Run 6 stress 0.02536553 
## ... Procrustes: rmse 2.613274e-05  max resid 6.407172e-05 
## ... Similar to previous best
## Run 7 stress 0.08879125 
## Run 8 stress 0.05302891 
## Run 9 stress 0.08879126 
## Run 10 stress 0.09127608 
## Run 11 stress 0.09594028 
## Run 12 stress 0.02536552 
## ... Procrustes: rmse 2.191914e-05  max resid 4.969234e-05 
## ... Similar to previous best
## Run 13 stress 0.02536553 
## ... Procrustes: rmse 3.079821e-05  max resid 7.090852e-05 
## ... Similar to previous best
## Run 14 stress 0.09706119 
## Run 15 stress 0.0530289 
## Run 16 stress 0.05302881 
## Run 17 stress 0.0676082 
## Run 18 stress 0.09991109 
## Run 19 stress 0.02536552 
## ... New best solution
## ... Procrustes: rmse 7.703706e-06  max resid 2.204142e-05 
## ... Similar to previous best
## Run 20 stress 0.09043775 
## *** Best solution repeated 1 times
## Run 0 stress 0.007347678 
## Run 1 stress 0.009895093 
## Run 2 stress 0.01051935 
## Run 3 stress 0.008798682 
## Run 4 stress 0.009623193 
## Run 5 stress 0.01150947 
## Run 6 stress 0.01119637 
## Run 7 stress 0.00850877 
## Run 8 stress 0.00864702 
## Run 9 stress 0.01302275 
## Run 10 stress 0.008439507 
## Run 11 stress 0.007494895 
## ... Procrustes: rmse 0.01681193  max resid 0.06642044 
## Run 12 stress 0.009389426 
## Run 13 stress 0.01195031 
## Run 14 stress 0.0110265 
## Run 15 stress 0.007460751 
## ... Procrustes: rmse 0.01627929  max resid 0.06412957 
## Run 16 stress 0.01170512 
## Run 17 stress 0.00966996 
## Run 18 stress 0.01077525 
## Run 19 stress 0.01039775 
## Run 20 stress 0.01159429 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: no. of iterations >= maxit
data.scores <- nmds %>%
  map(~scores(.x)$sites) %>%
  map(as.data.frame) %>%
  map(~rownames_to_column(.x, var="SampleID")) %>%
  map2(., names(.), ~mutate(.x, Experiment=.y)) %>%
  purrr::reduce(full_join) %>%
  left_join(metadata_df) %>%
  mutate(DayN=as.character(DayN)) %>%
  replace_na(list(DayN="NA")) %>%
    mutate(Experiment=factor(Experiment, levels=c("Chow", "Kimchi", "Tea", "Tea (Fungi)"), labels=c("Chow chow", "Kimchi", "Kombucha", "Kombucha (fungi)")),
         DayN=factor(DayN, levels=c("NA", "3", "4", "8", "10")),
         ControlSample=factor(ControlSample, levels=c("sample", "brine", "starter", "scoby"), labels=c("sample", "brine", "starter", "commercial scoby")))

succession_beta_plot <- data.scores %>%
  ggplot(aes(x=NMDS1, y=NMDS2, color=DayN, shape=ControlSample)) +
  geom_point() +
  facet_wrap(~Experiment, scales = "free") +
  okabe_ito_colors +
  theme(axis.text = element_text(size=11), 
        legend.title = element_text(size=15), 
        legend.text = element_text(size=13),
        panel.grid.minor = element_blank()) +
  labs(color="Day", shape="Sample type")
succession_beta_plot

bray_by_day_data  <- metadata_df %>%
  filter(ControlSample=="sample") %>%
  column_to_rownames(var="SampleID") %>%
  split(.$Experiment)

bray_by_day_asvs <- ASV_df_prop %>%
  left_join(metadata_df) %>%
  filter(ControlSample=="sample") %>%
  select(Experiment, SampleID, ASVID, Abundance) %>%
  split(.$Experiment) %>%
  map(~spread(.x, ASVID, Abundance)) %>%
  map(~select(.x, -Experiment)) %>%
  map(~column_to_rownames(.x, var="SampleID"))

set.seed(021324)
map2(bray_by_day_data, bray_by_day_asvs, ~adonis2(formula=.y~Day, data=.x, permutations=999, method="bray"))
## $Chow
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Day, data = .x, permutations = 999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.5867 0.06814 3.0713  0.007 **
## Residual 42   8.0239 0.93186                 
## Total    43   8.6106 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Kimchi
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Day, data = .x, permutations = 999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.6929 0.07113 3.2162  0.009 **
## Residual 42   9.0488 0.92887                 
## Total    43   9.7417 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Tea
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Day, data = .x, permutations = 999, method = "bray")
##          Df SumOfSqs       R2       F Pr(>F)
## Model     1 -0.00059 -0.00032 -0.0096  0.963
## Residual 30  1.84885  1.00032               
## Total    31  1.84826  1.00000               
## 
## $`Tea (Fungi)`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = .y ~ Day, data = .x, permutations = 999, method = "bray")
##          Df  SumOfSqs      R2      F Pr(>F)
## Model     1 0.0000233 0.00712 0.2152  0.689
## Residual 30 0.0032499 0.99288              
## Total    31 0.0032732 1.00000

Figure S3: Succession over time

plot_grid(pH_day_plot, succession_alpha_plot, succession_beta_plot, ncol=1, labels = c("A", "B", "C"), rel_heights = c(1, 1.4, 1.9))

#ggsave(filename = file.path(figure_out, paste(Sys.Date(), "FigureS3_succession_figure.png", sep="_")))

Indicator Taxa Analysis

  • Measure indicator correlation indices of ASVs for variables with significant impact on bacterial community based on PERMANOVA results
  • Test ASVs that are present in ≥ 10% samples per experiment
  • Use the indicspecies package for this analysis (De Caceres et al., 2023)
#10% of samples
sample10 <- metadata_df %>%
  count(Experiment) %>%
  mutate(n10=n/10)
sample10
## # A tibble: 4 × 3
##   Experiment      n   n10
##   <chr>       <int> <dbl>
## 1 Chow           45   4.5
## 2 Kimchi         44   4.4
## 3 Tea            35   3.5
## 4 Tea (Fungi)    34   3.4

Day

#significant differences in communities by day in Kimchi and Chow chow samples
indvday0 <- ASV_df_prop %>%
  with_groups(c(Experiment, ASVID), filter, sum(Abundance>0)>=4) %>% # in 10% of samples
  with_groups(c(Experiment, ASVID), filter, mean(Abundance)>=0.001) %>% #average abundance of 0.1%
  left_join(metadata_df[,c("SampleID", "Day", "ControlSample")]) %>%
  filter(ControlSample=="sample", Experiment %in% c("Chow", "Kimchi")) %>%
  select(SampleID, Experiment, Day, ASVID, Abundance) %>%
  split(.$Experiment) %>%
  map(~spread(.x, ASVID, Abundance))

indvday_asv <- indvday0 %>%
  map(~select(.x, -Day,  -Experiment)) %>%
  map(~column_to_rownames(.x, var="SampleID"))

indvday_groups <- indvday0 %>%
  map(~.x$Day)

set.seed(0123)    
indval_day <- map2(indvday_asv, indvday_groups, ~multipatt(.x, .y, control = how(nperm=999))) 

indval_day_tables <- list(indval_day$Chow$sign, indval_day$Kimchi$sign) %>%
  map2(., c("Chow", "Kimchi"), ~mutate(.x, Experiment=.y)) %>%
  map(~rownames_to_column(.x, var="ASVID")) %>%
  map(~adjust_pvalue(data=.x, p.col="p.value", method = "fdr")) %>%
  map(~left_join(.x, unique(ASV_df_prop[,c("ASVID", "Phylum", "Class", "Order", "Family", "Genus", "Species")])))
names(indval_day_tables) <- c("Chow", "Kimchi")

#significant Chow
indval_day_tables$Chow %>%
  filter(p.value.adj<0.05) %>%
   mutate(Day=case_when(index==1~"ten", 
                       index==2~"three")) %>%
  select(Experiment, Day, ASVID, Family, Genus, Species, stat, p.value, p.value.adj) %>%
  arrange(Day, p.value.adj)
## [1] Experiment  Day         ASVID       Family      Genus       Species    
## [7] stat        p.value     p.value.adj
## <0 rows> (or 0-length row.names)
#significant Tea
indval_day_tables$Kimchi %>%
  filter(p.value.adj<0.05) %>%
  mutate(Day=case_when(index==1~"ten", 
                       index==2~"three")) %>%
  select(Experiment, Day, ASVID, Family, Genus, Species, stat, p.value, p.value.adj) %>%
  arrange(Day, p.value.adj)
## [1] Experiment  Day         ASVID       Family      Genus       Species    
## [7] stat        p.value     p.value.adj
## <0 rows> (or 0-length row.names)

No taxa are significantly associated with days.

Substrate

#significant differences in communities by substrate in Kimchi and Tea samples
indvplant0 <- ASV_df_prop %>%
  left_join(metadata_df[,c("SampleID", "Day", "Plant",  "ControlSample")]) %>%
  filter(ControlSample=="sample", Experiment %in% c("Kimchi", "Tea"), Day %in% c("eight", "ten")) %>%
  group_by(ASVID) %>%
  filter(case_when(Experiment=="Kimchi"~sum(Abundance>0)>=4,
                   Experiment=="Tea"~sum(Abundance>0)>=3)) %>% # in 10% of samples
  with_groups(c(Experiment, ASVID), filter, mean(Abundance)>=0.001) %>% # average of 0.1% abundance
  select(SampleID, Experiment, Plant, ASVID, Abundance) %>%
  split(.$Experiment) %>%
  map(~spread(.x, ASVID, Abundance))

indvplant_asv <- indvplant0 %>%
  map(~select(.x, -Plant,  -Experiment)) %>%
  map(~column_to_rownames(.x, var="SampleID"))

indvplant_groups <- indvplant0 %>%
  map(~.x$Plant)
  
set.seed(0123)  
indval_plant <- map2(indvplant_asv, indvplant_groups, ~multipatt(.x, .y, control = how(nperm=999))) 

indval_plant_tables <- list(indval_plant$Kimchi$sign, indval_plant$Tea$sign) %>%
  map2(., c("Kimchi", "Tea"), ~mutate(.x, Experiment=.y)) %>%
  map(~rownames_to_column(.x, var="ASVID")) %>%
  map(~adjust_pvalue(data=.x, p.col="p.value", method = "fdr")) %>%
  map(~left_join(.x, unique(ASV_df_prop[,c("ASVID", "Phylum", "Class", "Order", "Family", "Genus", "Species")])))
names(indval_plant_tables) <- c("Kimchi", "Tea")

#significant Kimchi
indKimchiASVs <- indval_plant_tables$Kimchi %>%
  filter(p.value.adj<0.05) %>%
  .$ASVID

indKimchiAbs <- ASV_df_prop %>%
  filter(ASVID %in% indKimchiASVs, Experiment=="Kimchi") %>%
  left_join(metadata_df[,c("SampleID", "Plant")]) %>%
  with_groups(c(ASVID, Plant), summarize, mean=round(mean(Abundance)*100, 2)) %>%
  spread(Plant, mean)

indval_kimchi_table <- indval_plant_tables$Kimchi %>%
  filter(p.value.adj<0.05) %>%
  left_join(indKimchiAbs) %>%
  mutate(Substrate=case_when(index==1~"Cabbage", 
                             index==2~"Radish"),
         stat=round(stat, 3),
         p.value.adj=round(p.value.adj, 3)) %>%
  arrange(Substrate, Family, Genus, Species) %>%
  mutate(` `=row_number()) %>%
  select(` `, Family, Genus, Species, stat, p.value, p.value.adj, Cabbage, Radish) %>%
  dplyr::rename(`p value`=p.value,
                `q value`=p.value.adj,
                `Mean\ncabbage\nabund. (%)`=Cabbage,
                `Mean\nradish\nabund. (%)`=Radish) %>%
  replace_na(list(Genus="", Species="")) %>%
  kableExtra::kable(caption="Significant Indicator ASVs in Kimchi", align = "l") %>%
  row_spec(0, bold=TRUE) %>%
  kable_classic(html_font = "Arial", full_width=FALSE) %>%
  pack_rows("Cabbage", 1, 11, bold = TRUE) %>%
  pack_rows("Radish", 12, 13, bold = TRUE)
indval_kimchi_table
Significant Indicator ASVs in Kimchi
Family Genus Species stat p value q value Mean cabbage abund. (% |Mean radish abund.
Cabbage
1 Acetobacteraceae Komagataeibacter 0.938 0.001 0.003 0.54 0.08
2 Lactobacillaceae 0.866 0.007 0.015 0.54 0.04
3 Lactobacillaceae 0.901 0.002 0.006 0.94 0.08
4 Lactobacillaceae 0.911 0.001 0.003 1.75 0.11
5 Lactobacillaceae 0.969 0.001 0.003 2.70 0.13
6 Leuconostocaceae Leuconostoc mesenteroides 0.980 0.001 0.003 4.88 0.07
7 Leuconostocaceae Leuconostoc 0.878 0.007 0.015 1.08 0.04
8 Leuconostocaceae Weissella viridescens 0.816 0.006 0.015 0.61 0.00
9 Leuconostocaceae Weissella viridescens 0.993 0.001 0.003 5.30 0.07
10 Leuconostocaceae Weissella viridescens 0.975 0.001 0.003 2.64 0.13
11 Streptococcaceae Lactococcus lactis 0.971 0.026 0.048 0.85 0.07
Radish
12 Erwiniaceae Pantoea vagans 0.906 0.019 0.038 0.68 1.47
13 Lactobacillaceae Latilactobacillus curvatus 0.977 0.001 0.003 0.49 6.20
#save_kable(indval_kimchi_table, zoom=5, file = file.path(figure_out, paste0(Sys.Date(), "Table1_indval_table.png")))


#significant Tea
indval_plant_tables$Tea %>%
  filter(p.value.adj<0.05) %>%
  mutate(Substrate=case_when(index==1~"Black",
                             index==2~"Green",
                             index==3~"Green & Black")) %>%
  select(Substrate, Family, Genus, Species, stat, p.value, p.value.adj) %>%
  arrange(Substrate, p.value.adj)
## [1] Substrate   Family      Genus       Species     stat        p.value    
## [7] p.value.adj
## <0 rows> (or 0-length row.names)

11 ASVs associated with cabbage and 2 ASVs associated with radish. 3 of the cabbage-assicaited ASVs were Weissella viridescens.

Figure S2: Relative abundances of significant indicator ASVs

indASVorderDF <- indval_plant_tables$Kimchi %>%
  filter(p.value.adj<0.05) %>%
  mutate(Substrate=case_when(index==1~"Cabbage", 
                             index==2~"Radish")) %>%
  arrange(Substrate, Family, Genus, Species) %>%
  replace_na(list(Species="")) %>%
  mutate(rown=row_number(),
         Family=case_when(!is.na(Family)~paste0("f_", Family)),
         Genus=coalesce(Genus, Family),                
         Species2=paste(rown, Genus, Species)) %>%
  select(ASVID, Species2)

ASV_df_prop %>%
  filter(Experiment=="Kimchi") %>%
  filter(ASVID %in% indKimchiASVs) %>%
  left_join(metadata_df[,c("SampleID", "Plant")]) %>%
  left_join(indASVorderDF) %>%
  mutate(Species2=factor(Species2, levels=indASVorderDF$Species2)) %>%
  ggplot(aes(x=Plant, y=Abundance)) +
  geom_boxplot(alpha=0) +
  geom_beeswarm(alpha=0.4, cex = 5, corral = "wrap") + 
  facet_wrap(~Species2, scales = "free_y",  nrow=3) + 
  theme(axis.text.x = element_text(size=11),
        strip.text.x = element_text(size=9),
        panel.grid.minor = element_blank()) +
  labs(x=NULL)

#ggsave(filename = file.path(figure_out, paste(Sys.Date(), "FigureS2_indval_abundances.png", sep="_")))

Session Info

## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] magick_2.8.5        ggtext_0.1.2        cowplot_1.1.3      
##  [4] ggbeeswarm_0.7.2    kableExtra_1.4.0    formattable_0.2.1  
##  [7] indicspecies_1.7.15 rstatix_0.7.2       vegan_2.6-8        
## [10] lattice_0.22-6      permute_0.9-7       lubridate_1.9.3    
## [13] forcats_1.0.0       stringr_1.5.1       dplyr_1.1.4        
## [16] purrr_1.0.2         readr_2.1.5         tidyr_1.3.1        
## [19] tibble_3.2.1        ggplot2_3.5.1       tidyverse_2.0.0    
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1  viridisLite_0.4.2 farver_2.1.2      vipor_0.4.7      
##  [5] fastmap_1.2.0     webshot2_0.1.1    promises_1.3.2    digest_0.6.36    
##  [9] timechange_0.3.0  lifecycle_1.0.4   cluster_2.1.6     processx_3.8.4   
## [13] magrittr_2.0.3    compiler_4.4.1    rlang_1.1.4       sass_0.4.9       
## [17] tools_4.4.1       utf8_1.2.4        yaml_2.3.10       knitr_1.48       
## [21] labeling_0.4.3    htmlwidgets_1.6.4 bit_4.0.5         xml2_1.3.6       
## [25] websocket_1.4.2   abind_1.4-8       withr_3.0.1       grid_4.4.1       
## [29] fansi_1.0.6       colorspace_2.1-1  scales_1.3.0      MASS_7.3-60.2    
## [33] cli_3.6.3         rmarkdown_2.27    crayon_1.5.3      generics_0.1.3   
## [37] rstudioapi_0.16.0 tzdb_0.4.0        commonmark_1.9.2  chromote_0.4.0   
## [41] cachem_1.1.0      splines_4.4.1     parallel_4.4.1    vctrs_0.6.5      
## [45] Matrix_1.7-0      jsonlite_1.8.8    carData_3.0-5     car_3.1-3        
## [49] hms_1.1.3         bit64_4.0.5       Formula_1.2-5     beeswarm_0.4.0   
## [53] systemfonts_1.1.0 jquerylib_0.1.4   glue_1.7.0        ps_1.7.7         
## [57] stringi_1.8.4     gtable_0.3.5      later_1.4.1       munsell_0.5.1    
## [61] pillar_1.9.0      htmltools_0.5.8.1 R6_2.5.1          vroom_1.6.5      
## [65] evaluate_0.24.0   markdown_1.13     highr_0.11        backports_1.5.0  
## [69] gridtext_0.1.5    broom_1.0.6       bslib_0.8.0       Rcpp_1.0.14      
## [73] svglite_2.1.3     nlme_3.1-164      mgcv_1.9-1        xfun_0.46        
## [77] pkgconfig_2.0.3